## [1] "/home/guanshim/Documents/gitlab/Cario_RNASeq_Microbiom_Inte/DataProcessed/Vgenes"

0.1 Data

## # A tibble: 6 x 47
##   IGHV1.18 IGHV1.2 IGHV1.24 IGHV1.3 IGHV3.15 IGHV3.21 IGHV3.23 IGHV3.30
##      <dbl>   <dbl>    <dbl>   <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
## 1    1.13     1.37     2.91    2.50     5.17    1.19      3.38     4.20
## 2    0.856    1.56     3.18    2.64     4.74    1.38      3.61     4.46
## 3    0.729    1.94     3.17    2.55     5.17    1.14      4.14     3.79
## 4    0.848    1.16     3.46    2.09     4.73    1.52      2.84     3.74
## 5    0.832    2.09     3.01    2.39     5.15    0.985     4.26     4.12
## 6    1.11     1.75     3.65    2.34     5.38    1.02      3.89     3.05
##   IGHV3.33 IGHV3.48 IGHV3.49 IGHV3.53 IGHV3.7 IGHV3.72 IGHV3.73 IGHV3.74
##      <dbl>    <dbl>    <dbl>    <dbl>   <dbl>    <dbl>    <dbl>    <dbl>
## 1     4.90    1.13      2.10     2.02    6.23     3.12     1.26     3.54
## 2     5.50    1.52      2.20     1.74    6.38     3.17     1.23     3.27
## 3     5.30    1.49      1.87     1.64    6.20     3.87     1.61     2.75
## 4     5.07    0.947     1.75     1.50    5.58     2.40     1.06     2.61
## 5     5.37    1.05      1.62     1.99    6.47     2.80     1.47     3.64
## 6     5.53    1.75      2.16     1.58    6.89     3.40     1.31     3.69
##   IGHV4.34 IGHV4.39 IGHV4.59 IGHV7.81 IGKV1.39 IGKV1.5 IGKV1D.39 IGKV2.28
##      <dbl>    <dbl>    <dbl>    <dbl>    <dbl>   <dbl>     <dbl>    <dbl>
## 1    1.08     0.906     1.37     4.80    0.796    1.65      1.22     1.79
## 2    1.05     0.881     1.34     4.28    0.773    2.01      1.19     1.52
## 3    1.26     0.750     2.01     4.52    1.01     1.86      1.05     1.94
## 4    1.14     0.742     1.40     4.40    0.886    1.64      1.26     1.66
## 5    1.38     0.854     1.15     4.06    0.641    2.26      1.14     1.54
## 6    0.943    0.936     1.59     4.59    1.14     1.87      1.21     1.74
##   IGKV2D.28 IGKV3.11 IGKV3.15 IGKV3.20 IGKV4.1 IGLV1.40 IGLV1.44 IGLV1.47
##       <dbl>    <dbl>    <dbl>    <dbl>   <dbl>    <dbl>    <dbl>    <dbl>
## 1      3.16     1.22     1.60     1.92    5.20    1.03      2.00    1.82 
## 2      1.58     1.19     1.54     2.43    4.61    1.00      1.95    0.970
## 3      1.38     1.05     1.11     1.78    5.11    1.31      2.32    0.836
## 4      1.49     1.45     1.21     1.87    4.68    0.867     2.28    1.07 
## 5      2.20     1.42     1.37     2.10    5.01    0.972     1.79    0.937
## 6      1.45     1.39     1.40     2.61    5.22    1.18      2.20    0.772
##   IGLV2.11 IGLV2.14 IGLV2.23 IGLV2.8 IGLV3.1 IGLV3.10 IGLV3.19 IGLV3.21
##      <dbl>    <dbl>    <dbl>   <dbl>   <dbl>    <dbl>    <dbl>    <dbl>
## 1     1.52     2.87    1.01     1.41    3.42     2.64    1.04      3.35
## 2     1.49     2.22    0.989    2.10    3.86     2.77    1.02      2.74
## 3     1.44     2.69    1.54     1.64    3.41     3.10    0.870     3.22
## 4     1.42     2.65    1.08     1.53    3.82     2.44    0.990     3.05
## 5     1.68     2.22    0.839    1.40    3.31     3.30    0.851     2.72
## 6     1.53     2.36    0.960    1.29    4.02     3.51    1.12      3.06
##   IGLV3.25 IGLV3.9 IGLV4.69 IGLV5.48 IGLV6.57 HIV   ID      
##      <dbl>   <dbl>    <dbl>    <dbl>    <dbl> <chr> <chr>   
## 1     1.32    1.63    0.744     1.30     6.20 no    MIHIV138
## 2     1.28    1.59    1.15      1.04     5.95 no    MIHIV178
## 3     1.80    1.74    0.608     1.14     6.69 no    MIHIV255
## 4     1.45    1.81    0.601     1.40     5.92 no    MIHIV278
## 5     1.59    1.78    1.16      1.29     6.60 no    MIHIV361
## 6     1.54    1.60    0.641     1.21     6.50 no    MIHIV404
## [1] 32 47
## # A tibble: 6 x 5
##   ID       HIV   iga_smu igk_smu igl_smu
##   <chr>    <chr>   <dbl>   <dbl>   <dbl>
## 1 MIHIV138 no       24.3   10.9     18.1
## 2 MIHIV178 no       24.9   11.2     13.6
## 3 MIHIV255 no       23.2   11.3     15  
## 4 MIHIV278 no       24.7   15.5     NA  
## 5 MIHIV361 no       22.3    9.22    NA  
## 6 MIHIV404 no       18.8   14.2     NA
## # A tibble: 6 x 36
##   Vsymbols   Num Type  Gene_ID           MIHIV138 MIHIV178 MIHIV255
##   <chr>    <dbl> <chr> <chr>                <dbl>    <dbl>    <dbl>
## 1 IGHV1-18     1 heavy ENSG00000211945.2     1.13    0.856    0.729
## 2 IGHV1-2      2 heavy ENSG00000211934.3     1.37    1.56     1.94 
## 3 IGHV1-24     3 heavy ENSG00000211950.2     2.91    3.18     3.17 
## 4 IGHV1-3      4 heavy ENSG00000211935.3     2.50    2.64     2.55 
## 5 IGHV3-15    19 heavy ENSG00000211943.2     5.17    4.74     5.17 
## 6 IGHV3-21    22 heavy ENSG00000211947.2     1.19    1.38     1.14 
##   MIHIV278 MIHIV361 MIHIV404 MIHIV493 MIHIV582 MIHIV708 MIHIV716 MIHIV914
##      <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
## 1    0.848    0.832     1.11     1.33    0.685    0.875     1.01    0.954
## 2    1.16     2.09      1.75     1.39    1.38     1.44      1.42    1.38 
## 3    3.46     3.01      3.65     3.86    3.69     3.46      3.34    2.72 
## 4    2.09     2.39      2.34     3.19    2.55     2.91      3.09    2.48 
## 5    4.73     5.15      5.38     5.75    4.79     5.34      5.92    4.72 
## 6    1.52     0.985     1.02     1.50    1.23     1.15      1.89    1.22 
##   MIHIV947 MIHIV972 MIHIV124 MIHIV132 MIHIV154 MIHIV188 MIHIV217 MIHIV286
##      <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
## 1    0.794    0.909    0.767     1.44     1.21     1.05    0.913    0.824
## 2    1.43     1.33     1.22      2.26     1.31     1.88    1.83     1.67 
## 3    3.27     2.95     2.94      3.97     4.36     3.27    3.10     3.05 
## 4    3.38     2.39     2.21      2.36     2.49     2.30    3.57     3.30 
## 5    5.15     5.11     4.88      6.19     6.35     5.63    5.33     5.71 
## 6    1.26     1.48     1.35      1.26     1.02     1.11    1.57     1.12 
##   MIHIV307 MIHIV323 MIHIV391 MIHIV428 MIHIV594 MIHIV622 MIHIV648 MIHIV683
##      <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
## 1    0.870     1.41    0.954    0.850    0.750     1.18     1.58    0.858
## 2    1.17      2.26    1.56     1.91     1.21      1.62     1.17    1.34 
## 3    3.77      4.66    3.55     4.12     3.71      3.27     5.02    3.03 
## 4    3.15      4.49    2.99     2.79     2.73      3.43     3.99    3.50 
## 5    5.64      6.55    5.55     5.79     4.62      5.98     6.45    6.64 
## 6    1.48      2.50    1.24     1.37     1.01      1.57     1.93    1.16 
##   MIHIV819 MIHIV825 MIHIV839 MIHIV965 MIHIV998
##      <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
## 1    0.929    0.802     2.28    0.908     1.29
## 2    1.64     1.27      2.63    1.66      1.35
## 3    3.05     3.21      4.00    3.19      3.56
## 4    3.36     3.09      3.92    3.36      3.05
## 5    5.27     4.89      6.68    6.27      6.31
## 6    1.21     1.10      2.52    1.22      1.33
## [1] 45 36
## # A tibble: 6 x 6
##   ID       Gender   Age HIV   `CD4 count (cells/ul)` `IL-6 (pg/ml)`
##   <chr>    <chr>  <dbl> <chr>                  <dbl>          <dbl>
## 1 MIHIV138 M         29 no                       728           2.16
## 2 MIHIV178 M         33 no                       736           0.51
## 3 MIHIV255 M         34 no                       588           0.69
## 4 MIHIV278 M         23 no                       532           0.73
## 5 MIHIV361 F         33 no                       720           0.19
## 6 MIHIV404 F         29 no                      1071           1.35
##  [1] "CD4 count (cells/ul)"                        
##  [2] "IL-6 (pg/ml)"                                
##  [3] "CRP (mg/ml)"                                 
##  [4] "iFABP (pg/ml)"                               
##  [5] "sCD27 (U/ml)"                                
##  [6] "CD14 (ng/ml)"                                
##  [7] "LPS (pg/ml)"                                 
##  [8] "LTA\n(OD)"                                   
##  [9] "CD38+ HLA-DR+ CD4 T cells (% of CD4 T cells)"
## [10] "CD38+ HLA-DR+ CD8 T cells (% of CD8 T cells)"
## [11] "CD4 T cells (% viable)"                      
## [12] "CD8 T cells (% viable)"                      
## [13] "Tissue HIV RNA (per CD4 T cell)"             
## [14] "Plasma VL"
## # A tibble: 6 x 2
##   `Plasma VL` Actinobacteria
##         <dbl>          <dbl>
## 1          NA        0.00829
## 2          NA        0.0330 
## 3          NA        0.00864
## 4          NA        0.00924
## 5          NA        0.00361
## 6          NA        0.0229
##  [1] "MIHIV138" "MIHIV178" "MIHIV255" "MIHIV278" "MIHIV361" "MIHIV404"
##  [7] "MIHIV493" "MIHIV582" "MIHIV708" "MIHIV716" "MIHIV914" "MIHIV947"
## [13] "MIHIV972" "MIHIV124" "MIHIV132" "MIHIV154" "MIHIV188" "MIHIV217"
## [19] "MIHIV286" "MIHIV307" "MIHIV323" "MIHIV391" "MIHIV428" "MIHIV594"
## [25] "MIHIV622" "MIHIV648" "MIHIV683" "MIHIV819" "MIHIV825" "MIHIV839"
## [31] "MIHIV965" "MIHIV998"

0.2 Correlation between Counts and Clinical parameter, Microbiome: No interaction

# rnaseq clinical parameters
clinical_names <- colnames(Clin)[5:18]
clinical_names
##  [1] "CD4 count (cells/ul)"                        
##  [2] "IL-6 (pg/ml)"                                
##  [3] "CRP (mg/ml)"                                 
##  [4] "iFABP (pg/ml)"                               
##  [5] "sCD27 (U/ml)"                                
##  [6] "CD14 (ng/ml)"                                
##  [7] "LPS (pg/ml)"                                 
##  [8] "LTA\n(OD)"                                   
##  [9] "CD38+ HLA-DR+ CD4 T cells (% of CD4 T cells)"
## [10] "CD38+ HLA-DR+ CD8 T cells (% of CD8 T cells)"
## [11] "CD4 T cells (% viable)"                      
## [12] "CD8 T cells (% viable)"                      
## [13] "Tissue HIV RNA (per CD4 T cell)"             
## [14] "Plasma VL"
n_clinical <- length(clinical_names)
############## rna seq ###### both
clinical_sum <- matrix(NA, 0, 7)
for (i in clinical_names[1:12]) {
    output = hivornot(rnaseq, Clin, i)
    # summary table
    sum_a = output_sum(output, "RNAseq")
    clinical_sum = rbind(clinical_sum, sum_a)
}

kable(clinical_sum, caption = "Vgenes RNAseq")
Vgenes RNAseq
Parameter HIV Status Gene Adjusted P Raw P Slope Sample Size
CD4 T cells (% viable) All Participants IGKV4.1 0.03217 0.0007148 -2.563e-02 32
CD4 T cells (% viable) All Participants IGLV1.44 0.04966 0.0022069 -1.793e-02 32
CD8 T cells (% viable) All Participants IGKV4.1 0.03568 0.0007929 1.901e-02 32
############## miseq ###### both
clinical_sum <- matrix(NA, 0, 7)
for (i in clinical_names[1:12]) {
    output = hivornot(miseq, Clin, i)
    # summary table
    sum_a = output_sum(output, "MiSeq")
    clinical_sum = rbind(clinical_sum, sum_a)
}

kable(clinical_sum, caption = "Vgenes MiSeq")
Vgenes MiSeq
Parameter HIV Status Gene Adjusted P Raw P Slope Sample Size
CD4 count (cells/ul) All Participants iga_smu 0.03359 0.03359 4.928e-03 32
CD4 count (cells/ul) All Participants igl_smu 0.03720 0.03720 6.823e-03 32
sCD27 (U/ml) All Participants iga_smu 0.001858 0.001858 -9.638e-02 32
CD14 (ng/ml) All Participants igl_smu 0.01366 0.01366 -4.469e-03 30
LPS (pg/ml) All Participants iga_smu 0.01513 0.01513 -2.757e-01 30
CD38+ HLA-DR+ CD4 T cells (% of CD4 T cells) All Participants iga_smu 0.02222 0.02222 -5.492e-01 31
CD38+ HLA-DR+ CD8 T cells (% of CD8 T cells) All Participants iga_smu 0.0433 0.0433 -1.044e-01 31
CD4 T cells (% viable) All Participants iga_smu 0.001763 0.001763 1.453e-01 32
CD8 T cells (% viable) All Participants iga_smu 0.001005 0.001005 -1.131e-01 32
CD8 T cells (% viable) All Participants igl_smu 0.000425 0.000425 -1.511e-01 32
# classes of microbiome
phylum <- colnames(Clin)[1:9 + 18]
family <- colnames(Clin)[10:20 + 18]
genus <- colnames(Clin)[21:28 + 18]
species <- colnames(Clin)[29:44 + 18]

############## rna seq ###### both phylum
clinical_sum <- matrix(NA, 0, 7)
paste("phylum")
## [1] "phylum"
for (i in phylum) {
    output = hivornot(rnaseq, Clin, i)
    # summary table
    sum_a = output_sum(output, "RNAseq")
    clinical_sum = rbind(clinical_sum, sum_a)
}
kable(clinical_sum, caption = "Vgenes RNAseq phylum")

Table: Vgenes RNAseq phylum

## family
clinical_sum <- matrix(NA, 0, 7)
paste("family")
## [1] "family"
for (i in family) {
    output = hivornot(rnaseq, Clin, i)
    # summary table
    sum_a = output_sum(output, "RNAseq")
    clinical_sum = rbind(clinical_sum, sum_a)
}

kable(clinical_sum, caption = "Vgenes RNAseq family")
Vgenes RNAseq family
Parameter HIV Status Gene Adjusted P Raw P Slope Sample Size
Prevotellaceae All Participants IGKV1.39 0.01111 0.0002468 8.729e-01 27
Brucellaceae All Participants IGLV1.40 0.043832 0.0019481 3.825e+02 27
Brucellaceae All Participants IGLV3.19 0.007625 0.0001694 5.264e+02 27
## genus
clinical_sum <- matrix(NA, 0, 7)
paste("genus")
## [1] "genus"
for (i in genus) {
    output = hivornot(rnaseq, Clin, i)
    # summary table
    sum_a = output_sum(output, "RNAseq")
    clinical_sum = rbind(clinical_sum, sum_a)
}

kable(clinical_sum, caption = "Vgenes RNAseq genus")
Vgenes RNAseq genus
Parameter HIV Status Gene Adjusted P Raw P Slope Sample Size
Prevotella All Participants IGKV1.39 0.01966 0.0004368 8.722e-01 27
## species
clinical_sum <- matrix(NA, 0, 7)
paste("species")
## [1] "species"
for (i in species) {
    output = hivornot(rnaseq, Clin, i)
    # summary table
    sum_a = output_sum(output, "RNAseq")
    clinical_sum = rbind(clinical_sum, sum_a)
}

kable(clinical_sum, caption = "Vgenes RNAseq species")
Vgenes RNAseq species
Parameter HIV Status Gene Adjusted P Raw P Slope Sample Size
P. stercorea All Participants IGHV3.30 0.01746 0.0010978 9.537e+00 27
P. stercorea All Participants IGKV1.39 0.01746 0.0006984 3.173e+00 27
P. stercorea All Participants IGLV3.21 0.01746 0.0011640 5.723e+00 27
P. oris All Participants IGHV3.73 0.008818 0.000196 8.854e+00 27
################## miseq #####################

clinical_sum <- matrix(NA, 0, 7)
paste("phylum")
## [1] "phylum"
for (i in phylum) {
    output = hivornot(miseq, Clin, i)
    # summary table
    sum_a = output_sum(output, "MiSeq")
    clinical_sum = rbind(clinical_sum, sum_a)
}

kable(clinical_sum, caption = "Vgenes miseq phylum")
Vgenes miseq phylum
Parameter HIV Status Gene Adjusted P Raw P Slope Sample Size
Cyanobacteria All Participants iga_smu 0.00970 0.00970 1.568e+02 27
Cyanobacteria All Participants igl_smu 0.01856 0.01856 1.580e+02 27
Firmicutes All Participants iga_smu 0.008763 0.008763 8.954e+00 27
Proteobacteria All Participants iga_smu 0.03297 0.03297 -8.501e+00 27
## family
clinical_sum <- matrix(NA, 0, 7)
paste("family")
## [1] "family"
for (i in family) {
    output = hivornot(miseq, Clin, i)
    # summary table
    sum_a = output_sum(output, "MiSeq")
    clinical_sum = rbind(clinical_sum, sum_a)
}

kable(clinical_sum, caption = "Vgenes miseq family")
Vgenes miseq family
Parameter HIV Status Gene Adjusted P Raw P Slope Sample Size
Lachnospiraceae All Participants iga_smu 0.04007 0.04007 1.007e+01 27
## genus
clinical_sum <- matrix(NA, 0, 7)
paste("genus")
## [1] "genus"
for (i in genus) {
    output = hivornot(miseq, Clin, i)
    # summary table
    sum_a = output_sum(output, "MiSeq")
    clinical_sum = rbind(clinical_sum, sum_a)
}

kable(clinical_sum, caption = "Vgenes RNAseq genus")
Vgenes RNAseq genus
Parameter HIV Status Gene Adjusted P Raw P Slope Sample Size
Blautia All Participants iga_smu 0.03105 0.03105 7.152e+01 27
Blautia All Participants igl_smu 0.03470 0.03470 1.236e+02 27
## species
clinical_sum <- matrix(NA, 0, 7)
paste("species")
## [1] "species"
for (i in species) {
    output = hivornot(miseq, Clin, i)
    # summary table
    sum_a = output_sum(output, "MiSeq")
    clinical_sum = rbind(clinical_sum, sum_a)
}

kable(clinical_sum, caption = "Vgenes RNAseq species")
Vgenes RNAseq species
Parameter HIV Status Gene Adjusted P Raw P Slope Sample Size
Bacteroides_dorei All Participants iga_smu 0.02683 0.02683 1.283e+02 27
Bacteroides_dorei All Participants igk_smu 0.04955 0.04955 9.830e+01 27
Bacteroides_dorei All Participants igl_smu 0.03370 0.03370 1.368e+02 27
Blautia_luti All Participants iga_smu 0.03813 0.03813 1.417e+02 27
Blautia_luti All Participants igl_smu 0.03847 0.03847 2.128e+02 27
Blautia_schinkii All Participants iga_smu 0.001614 0.001614 3.064e+02 27
Ruminococcus_bromii All Participants iga_smu 0.01074 0.01074 6.471e+01 27

0.3 Correlation between Counts and Clinical parameter, Microbiome: interaction with HIV

########################## linear regression ############################## equal
########################## length of outcomes and covariates
lm_Reg_Interaction <- function(gene_matrix, Clin, clin_var_name, 
    inter_term_name) {
    ######################## If there are non-numeric columns ###############
    gene_matrix = gene_matrix %>% select(-HIV, -ID)
    ###################### Binary Interaction matrix
    gene_matrix = as.matrix(gene_matrix)
    # get names ready
    genelistname = base::colnames(gene_matrix)
    ## number of gene to test, also the number of multiple test
    n_gene = ncol(gene_matrix)
    clinical_variable = as.matrix(Clin[, colnames(Clin) == clin_var_name])
    ####### the interaction variable , here a binary variable
    interaction_var = as.matrix(Clin[, colnames(Clin) == inter_term_name])
    ref = levels(factor(interaction_var))[1]
    non_ref = levels(factor(interaction_var))[2]
    interaction_var_ = factor(interaction_var, levels = c(non_ref, 
        ref))
    ## outcome lm
    outcome_lm = lapply(1:n_gene, function(i) {
        lm = lm(gene_matrix[, i] ~ clinical_variable * interaction_var + 
            Clin$Age + Clin$Gender)
        ## relevel to get estimate beta1 + beta3
        ## https://stats.stackexchange.com/questions/248248/test-whether-the-sum-of-two-coefficients-is-significantly-different-that-zero-in
        lm_relevel = lm(gene_matrix[, i] ~ clinical_variable * 
            interaction_var_ + Clin$Age + Clin$Gender)
        ##### interaction term is always the last row: 6 ######## -3:
        ##### means no t-statistics, -2: means no Std.Error term
        ##### #############
        coef = c(summary(lm)$coefficients[2, -c(2, 3)], summary(lm_relevel)$coefficients[2, 
            -c(2, 3)], summary(lm)$coefficients[6, -c(2, 3)])
        return(coef)
    })
    outcome_lm = data.frame(matrix(unlist(outcome_lm), ncol = 6, 
        byrow = TRUE, dimnames = list(c(colnames(gene_matrix)), 
            c("slope_no", "p.value_no", "slope_hiv", "p.value_hiv", 
                "slope_diff", "p.value_diff"))))
    
    # adjusted p-value
    outcome_lm = outcome_lm %>% # https://cran.r-project.org/web/packages/mutoss/mutoss.pdf
    # The Benjamini-Liu (BL) step-down procedure
    dplyr::mutate(FDR_no = if (n_gene >= 17) {
        p.adjust(p.value_no, "BH", n_gene)
    } else if (n_gene <= 10) {
        p.value_no
    } else {
        BL(p.value_no, alpha = 0.05, silent = T)$adjPValues
    }, FDR_hiv = if (n_gene >= 17) {
        p.adjust(p.value_hiv, "BH", n_gene)
    } else if (n_gene <= 10) {
        p.value_hiv
    } else {
        BL(p.value_hiv, alpha = 0.05, silent = T)$adjPValues
    }, FDR_diff = if (n_gene >= 17) {
        p.adjust(p.value_diff, "BH", n_gene)
    } else if (n_gene <= 10) {
        p.value_diff
    } else {
        BL(p.value_diff, alpha = 0.05, silent = T)$adjPValues
    }, names = colnames(gene_matrix)) %>% dplyr::mutate(slope_no = round(slope_no, 
        10), slope_hiv = round(slope_hiv, 10), slope_diff = round(slope_diff, 
        10)) %>% mutate(sig = ifelse((FDR_diff < 0.05) | (FDR_hiv < 
        0.05), "Sig.", "Non")) %>% select(names, everything())
    # sort by p.value outcome_lm =
    # outcome_lm[order(outcome_lm$p.value), ]
    outcome_lm = data.frame(outcome_lm)
    
    ## sample size
    size = sum(!is.na(clinical_variable))
    ################### scatter plot ##########################
    sig = sum(outcome_lm$sig == "Sig.")
    if (sig > 0) {
        # get significant genes list
        sig_genes = outcome_lm %>% dplyr::filter(sig == "Sig.") %>% 
            select(names)
        sig_genes = as.vector(unlist(sig_genes))
        sig_genes_matrix = as.matrix(gene_matrix[, colnames(gene_matrix) %in% 
            sig_genes])
        ########## get the right sample size, missing values in clinical, and
        ########## miseq #########3 length(clinical_variable) =
        ########## max(nrow(sig_genes_matrix), length(clinical_variable) )
        ########## nrow(sig_genes_matrix) = max(nrow(sig_genes_matrix),
        ########## length(clinical_variable) )
        plot_data = cbind(sig_genes_matrix, clinical_variable, 
            interaction_var)
        colnames(plot_data) = c(sig_genes, clin_var_name, inter_term_name)
        plot_data = data.frame(plot_data) %>% na.omit()
        for (i in sig_genes) {
            ## aes
            x_clin = as.numeric(plot_data[, ncol(plot_data) - 
                1])
            y_gene = as.numeric(plot_data[, colnames(plot_data) == 
                i])
            plot_group = as.factor(plot_data[, ncol(plot_data)])
            ## with plotdata
            p = ggplot(mapping = aes(x = x_clin, y = y_gene, 
                color = plot_group, shape = plot_group)) + geom_point() + 
                geom_smooth(method = "lm", se = FALSE) + labs(title = paste(sep = " ", 
                i, "vs.", clin_var_name, "\n group by", inter_term_name), 
                x = clin_var_name, y = i) + scale_color_discrete(name = paste(inter_term_name, 
                " Status", sep = ""), breaks = c("no", "yes"), 
                labels = c("Healthy Control", paste(inter_term_name, 
                  "-infected", sep = ""))) + scale_shape_discrete(name = paste(inter_term_name, 
                " Status", sep = ""), breaks = c("no", "yes"), 
                labels = c("Healthy Control", paste(inter_term_name, 
                  "-infected", sep = ""))) + 
            theme_minimal()
            # 
            print(p)
            
        }
        
        
    } else {
    }
    
    
    ################## summary table
    return(list(results = outcome_lm, size = size, clinical = clin_var_name))
}

###################### output summary function ####################### output is
###################### the
output_sum <- function(output, genelist) {
    # genelist is the name of the genelist genelist: 'RNAseq' or
    # 'MiSeq' a list of two sublists
    clinical = output$clinical
    clinical = gsub("\n", "_", clinical, fixed = T)
    # find a good file name
    para_name = paste(unlist(strsplit(clinical, " ", fixed = T))[1:3], 
        collapse = "_")
    para_name1 = gsub("/", "_", para_name, fixed = T)
    size = output$size
    input = output$results
    # lm results
    sig = sum(input$sig == "Sig.")
    write.xlsx(input, paste("~/Documents/gitlab/Cario_RNASeq_Microbiom_Inte/DataProcessed/Vgenes_inter/", 
        sep = "", Sys.Date(), "_", para_name1, "_", genelist, 
        "_", "interaction.xlsx"), sheetName = paste(sep = "_", 
        genelist))
    
    
    
    if (sig > 0) {
        
        ############# summary table #####################
        genes = input$names[input$sig == "Sig."]
        fdr_no = format.pval(input$FDR_no[input$sig == "Sig."], 
            digits = 4, eps = 1e-04, scientific = F)
        fdr_hiv = format.pval(input$FDR_hiv[input$sig == "Sig."], 
            digits = 4, eps = 1e-04, scientific = F)
        fdr_diff = format.pval(input$FDR_diff[input$sig == "Sig."], 
            digits = 4, eps = 1e-04, scientific = F)
        # neg_log10p = -log10(input$p.value[input$sig == 'Sig.'])
        slope_no = format(input$slope_no[input$sig == "Sig."], 
            digits = 4, scientific = T)
        slope_hiv = format(input$slope_hiv[input$sig == "Sig."], 
            digits = 4, scientific = T)
        slope_diff = format(input$slope_diff[input$sig == "Sig."], 
            digits = 4, scientific = T)
        sum_a = cbind(clinical, genes, fdr_no, slope_no, fdr_hiv, 
            slope_hiv, fdr_diff, slope_diff, size)
        colnames(sum_a) = c("Parameter", "Gene", "Adjusted_P_Non", 
            "Slope_Non", "Adjusted_P_HIV", "Slope_HIV", "Adjusted_P_Diff", 
            "Slope_Diff", "Sample_Size")
        
        return(sum_a)
    } else {
    }
    
}

All results of RNAseq are negative.

############# with clinical viarable ################# rna seq ######
############# both
clinical_sum <- matrix(NA, 0, 9)
for (i in clinical_names[1:12]) {
    output = lm_Reg_Interaction(rnaseq, Clin, i, "HIV")
    # summary table
    sum_a = output_sum(output, "RNAseq")
    clinical_sum = rbind(clinical_sum, sum_a)
}
kable(clinical_sum, caption = "Vgenes RNAseq: Clinical Parameter and HIV Interaction")

Table: Vgenes RNAseq: Clinical Parameter and HIV Interaction

########## Miseq ################
clinical_sum <- matrix(NA, 0, 9)
for (i in clinical_names[1:12]) {
    output = lm_Reg_Interaction(miseq, Clin, i, "HIV")
    # summary table
    sum_a = output_sum(output, "MiSeq")
    clinical_sum = rbind(clinical_sum, sum_a)
}

kable(clinical_sum, caption = "Vgenes MiSeq: Clinical Parameter and HIV Interaction")
Vgenes MiSeq: Clinical Parameter and HIV Interaction
Parameter Gene Adjusted_P_Non Slope_Non Adjusted_P_HIV Slope_HIV Adjusted_P_Diff Slope_Diff Sample_Size
CD4 count (cells/ul) igl_smu 0.08993 -1.66e-02 0.07211 6.355e-03 0.031 2.295e-02 32
sCD27 (U/ml) iga_smu 0.4472 9.757e-02 0.03311 -7.209e-02 0.2038 -1.697e-01 32
CD4 T cells (% viable) igl_smu 0.02245 -2.828e-01 0.03657 4.712e-01 0.003171 7.54e-01 32
CD8 T cells (% viable) igl_smu 0.08258 -4.524e-01 0.02266 -1.552e-01 0.2616 2.972e-01 32
###################### with Microbiome ############ classes of microbiome
phylum <- colnames(Clin)[1:9 + 18]
family <- colnames(Clin)[10:20 + 18]
genus <- colnames(Clin)[21:28 + 18]
species <- colnames(Clin)[29:44 + 18]

####### RNA seq #################
for (j in c("phylum", "family", "genus", "species")) {
    clinical_sum <- matrix(NA, 0, 9)
    for (i in eval(as.name(j))) {
        output = lm_Reg_Interaction(rnaseq, Clin, i, "HIV")
        # summary table
        sum_a = output_sum(output, "RNAseq")
        clinical_sum = rbind(clinical_sum, sum_a)
    }
    print(kable(clinical_sum, caption = paste("Vgenes RNAseq: HIV Interaction with ", 
        sep = "", j)))
}

## 
## 
## Table: Vgenes RNAseq: HIV Interaction with phylum
## 
## Parameter      Gene       Adjusted_P_Non   Slope_Non    Adjusted_P_HIV   Slope_HIV   Adjusted_P_Diff   Slope_Diff   Sample_Size 
## -------------  ---------  ---------------  -----------  ---------------  ----------  ----------------  -----------  ------------
## Fusobacteria   IGKV1.39   0.9792           -4.334e+00   0.03555          9.494e+01   0.05633           9.927e+01    27          
## Tenericutes    IGHV4.39   0.9054           -4.584e+00   0.057447         9.071e+01   0.046978          9.530e+01    27          
## Tenericutes    IGKV3.20   0.7209           -1.507e+01   0.057447         1.026e+02   0.036412          1.177e+02    27          
## Tenericutes    IGLV2.14   0.8779           -1.095e+01   0.003595         1.829e+02   0.004242          1.939e+02    27          
## Tenericutes    IGLV3.19   0.9297           -1.377e+00   0.003595         1.149e+02   0.004242          1.163e+02    27          
## Tenericutes    IGLV6.57   0.7209           -2.309e+01   0.072953         1.427e+02   0.045772          1.658e+02    27

## 
## 
## Table: Vgenes RNAseq: HIV Interaction with family
## 
## Parameter             Gene       Adjusted_P_Non   Slope_Non    Adjusted_P_HIV   Slope_HIV    Adjusted_P_Diff   Slope_Diff   Sample_Size 
## --------------------  ---------  ---------------  -----------  ---------------  -----------  ----------------  -----------  ------------
## Christensenellaceae   IGHV1.18   0.9416           -8.314e-01   0.0005867        8.175e+01    0.0007508         8.258e+01    27          
## Christensenellaceae   IGHV1.2    0.9416           -2.905e+00   0.0061953        8.499e+01    0.0047417         8.789e+01    27          
## Christensenellaceae   IGHV1.3    0.9416           -5.191e+00   0.0350303        9.285e+01    0.0265921         9.805e+01    27          
## Christensenellaceae   IGHV3.21   0.9416           1.177e+00    0.0005867        1.036e+02    0.0007508         1.025e+02    27          
## Christensenellaceae   IGHV3.23   0.4511           -8.260e+00   0.0072158        1.059e+02    0.0047417         1.142e+02    27          
## Christensenellaceae   IGHV3.7    0.9416           -4.609e+00   0.0203242        1.338e+02    0.0190335         1.385e+02    27          
## Christensenellaceae   IGHV3.72   0.9416           -4.629e+00   0.0515613        8.809e+01    0.0450188         9.272e+01    27          
## Christensenellaceae   IGHV3.74   0.5995           -7.069e+00   0.0072158        1.073e+02    0.0047417         1.144e+02    27          
## Christensenellaceae   IGHV4.34   0.9971           4.918e-03    0.0203242        3.688e+01    0.0205160         3.688e+01    27          
## Christensenellaceae   IGHV4.39   0.9416           -9.569e-01   0.0237261        5.971e+01    0.0229241         6.067e+01    27          
## Christensenellaceae   IGKV1.5    0.9416           -2.375e+00   0.0274026        6.646e+01    0.0240120         6.883e+01    27          
## Christensenellaceae   IGKV3.20   0.9416           -1.995e+00   0.0355375        6.333e+01    0.0314187         6.532e+01    27          
## Christensenellaceae   IGKV4.1    0.9416           -2.095e+00   0.0136861        8.262e+01    0.0122000         8.472e+01    27          
## Christensenellaceae   IGLV2.14   0.9971           -3.446e-01   0.0246018        9.534e+01    0.0240120         9.569e+01    27          
## Christensenellaceae   IGLV3.1    0.9971           3.830e-01    0.0072158        1.042e+02    0.0082207         1.039e+02    27          
## Christensenellaceae   IGLV3.25   0.9416           -9.937e-01   0.0216603        5.322e+01    0.0205160         5.422e+01    27          
## Christensenellaceae   IGLV3.9    0.9416           1.095e+00    0.0005867        1.160e+02    0.0007508         1.149e+02    27          
## Christensenellaceae   IGLV6.57   0.9416           -2.047e+00   0.0509046        8.875e+01    0.0450188         9.079e+01    27          
## Ruminococcaceae       IGLV3.9    0.713            1.385e+00    0.04973          7.179e+00    0.1447            5.794e+00    27          
## Moraxellaceae         IGLV6.57   0.9715           4.216e+00    0.04961          -1.695e+01   0.8657            -2.117e+01   27          
## Brucellaceae          IGLV3.19   0.9605           1.395e+03    0.0291           4.879e+02    0.9911            -9.074e+02   27          
## Rhodospirillaceae     IGHV1.18   0.9906           7.108e+00    0.009582         3.648e+03    0.009528          3.640e+03    27          
## Rhodospirillaceae     IGHV3.21   0.9906           1.125e+00    0.030586         4.197e+03    0.030495          4.196e+03    27          
## Rhodospirillaceae     IGLV2.14   0.9906           -4.838e+00   0.036897         5.092e+03    0.036449          5.096e+03    27          
## Rhodospirillaceae     IGLV3.25   0.9906           -2.084e+01   0.051424         2.556e+03    0.048320          2.576e+03    27          
## Rhodospirillaceae     IGLV3.9    0.9906           2.983e-01    0.009582         5.215e+03    0.009528          5.215e+03    27

## 
## 
## Table: Vgenes RNAseq: HIV Interaction with genus
## 
## Parameter       Gene       Adjusted_P_Non   Slope_Non    Adjusted_P_HIV   Slope_HIV   Adjusted_P_Diff   Slope_Diff   Sample_Size 
## --------------  ---------  ---------------  -----------  ---------------  ----------  ----------------  -----------  ------------
## Coprococcus     IGHV3.49   0.9165           -1.442e+01   0.0795           4.573e+01   0.03409           6.015e+01    27          
## Acinetobacter   IGLV6.57   0.9713           4.523e+00    0.04905          -1.7e+01    0.8436            -2.152e+01   27

## 
## 
## Table: Vgenes RNAseq: HIV Interaction with species
## 
## Parameter             Gene       Adjusted_P_Non   Slope_Non   Adjusted_P_HIV   Slope_HIV    Adjusted_P_Diff   Slope_Diff   Sample_Size 
## --------------------  ---------  ---------------  ----------  ---------------  -----------  ----------------  -----------  ------------
## P. oris               IGHV1.3    0.59822          3.718e+00   0.04368          3.594e+01    0.07205           3.223e+01    27          
## P. oris               IGKV3.11   0.16955          3.409e+00   0.01600          1.745e+01    0.06534           1.404e+01    27          
## P. oris               IGLV2.23   0.01785          6.835e+00   0.11022          -1.419e+01   0.03813           -2.103e+01   27          
## P. oris               IGLV3.10   0.92822          4.479e-01   0.04368          3.140e+01    0.06534           3.095e+01    27          
## Blautia_glucerasei    IGHV1.18   0.9941           9.382e-01   0.04646          1.168e+02    0.05263           1.158e+02    27          
## Ruminococcus_bromii   IGLV1.44   0.8933           -1.11e+00   0.02393          7.214e+01    0.02161           7.325e+01    27
####### miseq #################
for (j in c("phylum", "family", "genus")) {
    clinical_sum <- matrix(NA, 0, 9)
    for (i in eval(as.name(j))) {
        output = lm_Reg_Interaction(miseq, Clin, i, "HIV")
        # summary table
        sum_a = output_sum(output, "MiSeq")
        clinical_sum = rbind(clinical_sum, sum_a)
    }
    print(kable(clinical_sum, caption = paste("Vgenes MiSeq: HIV Interaction with ", 
        sep = "", j)))
}

## 
## 
## Table: Vgenes MiSeq: HIV Interaction with phylum
## 
## Parameter     Gene      Adjusted_P_Non   Slope_Non   Adjusted_P_HIV   Slope_HIV    Adjusted_P_Diff   Slope_Diff   Sample_Size 
## ------------  --------  ---------------  ----------  ---------------  -----------  ----------------  -----------  ------------
## Tenericutes   iga_smu   0.04090          1.739e+02   0.01473          -6.346e+02   0.004171          -8.086e+02   27          
## Tenericutes   igl_smu   0.09032          1.712e+02   0.02705          -6.540e+02   0.011545          -8.252e+02   27

## 
## 
## Table: Vgenes MiSeq: HIV Interaction with family
## 
## Parameter             Gene      Adjusted_P_Non   Slope_Non    Adjusted_P_HIV   Slope_HIV    Adjusted_P_Diff   Slope_Diff   Sample_Size 
## --------------------  --------  ---------------  -----------  ---------------  -----------  ----------------  -----------  ------------
## Christensenellaceae   iga_smu   0.5177           1.204e+01    0.01844          -4.447e+02   0.01687           -4.567e+02   27          
## Rhodospirillaceae     iga_smu   0.89737          2.258e+01    0.01676          -2.250e+04   0.01661           -2.252e+04   27          
## Rhodospirillaceae     igl_smu   0.07047          -3.773e+02   0.01766          -2.289e+04   0.01920           -2.251e+04   27

## 
## 
## Table: Vgenes MiSeq: HIV Interaction with genus
## 
## Parameter       Gene      Adjusted_P_Non   Slope_Non    Adjusted_P_HIV   Slope_HIV    Adjusted_P_Diff   Slope_Diff   Sample_Size 
## --------------  --------  ---------------  -----------  ---------------  -----------  ----------------  -----------  ------------
## Coprococcus     iga_smu   0.04593          1.546e+02    0.006967         -3.210e+02   0.001313          -4.756e+02   27          
## Coprococcus     igk_smu   0.71469          2.753e+01    0.002595         -3.737e+02   0.005858          -4.013e+02   27          
## Coprococcus     igl_smu   0.30533          1.635e+02    0.072465         -2.777e+02   0.048302          -4.412e+02   27          
## Thalassospira   igl_smu   0.07232          -3.933e+02   0.03348          -2.662e+05   0.03368           -2.658e+05   27
######### 'species' ###############
clinical_sum <- matrix(NA, 0, 9)
for (i in species[-6]) {
    output = lm_Reg_Interaction(miseq, Clin, i, "HIV")
    # summary table
    sum_a = output_sum(output, "MiSeq")
    clinical_sum = rbind(clinical_sum, sum_a)
}

kable(clinical_sum, caption = paste("Vgenes MiSeq: HIV Interaction with species"))
Vgenes MiSeq: HIV Interaction with species
Parameter Gene Adjusted_P_Non Slope_Non Adjusted_P_HIV Slope_HIV Adjusted_P_Diff Slope_Diff Sample_Size
Blautia_glucerasei iga_smu 0.8974 1.364e+01 0.002807 -9.222e+02 0.004015 -9.359e+02 27
Blautia_glucerasei igk_smu 0.1062 -1.856e+02 0.033407 -6.528e+02 0.140293 -4.672e+02 27
Blautia_glucerasei igl_smu 0.2010 2.329e+02 0.008361 -9.294e+02 0.005640 -1.162e+03 27

0.4 Fix the non-negative zero-inflated continuous pattern of Microbiome Data

Clin[1:10, 24:28]
## # A tibble: 10 x 5
##    Proteobacteria Spirochaetes Tenericutes Verrucomicrobia Prevotellaceae
##             <dbl>        <dbl>       <dbl>           <dbl>          <dbl>
##  1        0.00732   0            0              0               0.0000264
##  2        0.0236    0            0              0               0.000300 
##  3        0.0831    0.00190      0.0000168      0.00364         0.000126 
##  4        0.0193    0            0.00414        0.00924         0.00163  
##  5        0.0453    0            0              0.000221        0.000157 
##  6        0.223     0.0000166    0              0.0413          0.0000828
##  7        0.0518    0.0121       0.0000851      0.00000567      0.00255  
##  8        0.0284    0.00000868   0              0.0187          0.000191 
##  9        0.0577    0.0000530    0              0               0.000212 
## 10        0.178     0.0000612    0              0.0665          0.193

0.5 Test of the function